set(['DS', 'Productivity', 'DS', 'DS', 'Productivity', 'Productivity'])

{'DS', 'Productivity'}

#the setup
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from collections import Counter

def generate_data(w):
    #generate sample data and throw into dataframe
    data, labels= make_classification(
        n_samples = 1000, 
        n_features = 3, 
        n_informative=3,
        n_redundant=0,
        n_classes = 2, 
        weights=[w],
        random_state=42
        )
    data = np.append(data, labels.reshape(-1,1), axis = 1)
    
    return pd.DataFrame(data, columns = ['feature_0', 'feature_1', 'feature_2', 'target'])

def split_data(df):
    #split our data into train and test 
    train, test = train_test_split(df,
                test_size = 0.2,
                stratify = df['target'],
                random_state = 42)

    X_train, y_train = train.drop('target', axis = 1), train['target']

    X_test, y_test = test.drop('target', axis = 1), test['target']
    
    return X_train, X_test, y_train, y_test

def train_model(train, target):
    model = LogisticRegression()
    model.fit(train, target)
    return model

disease_df = generate_data(0.99)

X_train, X_test, y_train, y_test = split_data(disease_df)

#instantiate and fit our model
disease_model = train_model(X_train, y_train)

#get the predictions for our test set
y_preds = disease_model.predict(X_test)

accuracy_score(y_test, y_preds)

0.985

#Get a frequency count of our predictions
Counter(y_preds)

Counter({0.0: 200})

cm = pd.DataFrame(confusion_matrix(y_test, y_preds), 
             columns = ['Predicted False', 'Predicted True'],
            index = ['Actual False', 'Actual True'])
cm

from sklearn.metrics import precision_score, recall_score

precision_score(y_test, y_preds)

0.0

recall_score(y_test, y_preds)

0.0

football_df = generate_data(0.65)
X_train, X_test, y_train, y_test = split_data(football_df)

#instantiate and fit our model
football_model = train_model(X_train, y_train)

y_preds = football_model.predict(X_test)

accuracy_score(y_test, y_preds)

0.93

pd.DataFrame(confusion_matrix(y_test, y_preds), 
             columns = ['Predicted False', 'Predicted True'],
            index = ['Actual False', 'Actual True'])

print(classification_report(y_test, y_preds))

              precision    recall  f1-score   support

         0.0       0.93      0.97      0.95       130
         1.0       0.94      0.86      0.90        70

    accuracy                           0.93       200
   macro avg       0.93      0.91      0.92       200
weighted avg       0.93      0.93      0.93       200

f, ax = plt.subplots(figsize=(6, 6))
ax.plot([0, 1], [0, 1], transform=ax.transAxes)
plt.xlim([-0.01,1.01])
plt.ylim([-0.01,1.01])

plt.plot(0,1, marker="o", markersize=10)
plt.annotate("D", (0,1), weight='bold')

plt.plot(0.2,0.8, marker="o", markersize=10, color = 'red')
plt.annotate("A", (0.2,0.8), weight='bold',)

plt.plot(0.4,0.85, marker="o", markersize=10,  color = 'blue')
plt.annotate("B", (0.4,0.85), weight='bold',)

plt.plot(0.7,0.7, marker="o", markersize=10)
plt.annotate("C", (0.7,0.7), weight='bold')

plt.plot(0.8,0.2, marker="o", markersize=10)
plt.annotate("E", (0.8,0.2), weight='bold')

plt.plot(0,0, marker="o", markersize=10)
plt.annotate("F", (0,0), weight='bold')

plt.plot(1,1, marker="o", markersize=10)
plt.annotate("G", (1,1), weight='bold')

plt.xlabel('FPR', weight = 'bold')
plt.ylabel('TPR', weight = 'bold')
plt.title('ROC space', weight = 'bold')

plt.show()

#we've talked about it enough but now lets actually plot it  

from sklearn.metrics import RocCurveDisplay

X_train, X_test, y_train, y_test = split_data(football_df)

RocCurveDisplay.from_estimator(football_model, X_test, y_test)
plt.title('Football Model ROC-CURVE', weight= 'bold')

Text(0.5, 1.0, 'Football Model ROC-CURVE')

#Wow our football model is pretty good in terms of AUC and ROC
# Lets check out our disease model 

X_train, X_test, y_train, y_test = split_data(disease_df)

RocCurveDisplay.from_estimator(disease_model, X_test, y_test)
plt.title('Disease Model ROC-CURVE', weight= 'bold')

Text(0.5, 1.0, 'Disease Model ROC-CURVE')

from sklearn.metrics import PrecisionRecallDisplay

X_train, X_test, y_train, y_test = split_data(football_df)

PrecisionRecallDisplay.from_estimator(football_model, X_test, y_test)
plt.title('Football Model PR-CURVE', weight= 'bold')

Text(0.5, 1.0, 'Football Model PR-CURVE')

#The Disease Model

X_train, X_test, y_train, y_test = split_data(disease_df)

PrecisionRecallDisplay.from_estimator(disease_model, X_test, y_test)
plt.title('Disease Model PR-CURVE', weight= 'bold')

Text(0.5, 1.0, 'Disease Model PR-CURVE')

np.arange(1,0, -0.01)

array([1.  , 0.99, 0.98, 0.97, 0.96, 0.95, 0.94, 0.93, 0.92, 0.91, 0.9 ,
       0.89, 0.88, 0.87, 0.86, 0.85, 0.84, 0.83, 0.82, 0.81, 0.8 , 0.79,
       0.78, 0.77, 0.76, 0.75, 0.74, 0.73, 0.72, 0.71, 0.7 , 0.69, 0.68,
       0.67, 0.66, 0.65, 0.64, 0.63, 0.62, 0.61, 0.6 , 0.59, 0.58, 0.57,
       0.56, 0.55, 0.54, 0.53, 0.52, 0.51, 0.5 , 0.49, 0.48, 0.47, 0.46,
       0.45, 0.44, 0.43, 0.42, 0.41, 0.4 , 0.39, 0.38, 0.37, 0.36, 0.35,
       0.34, 0.33, 0.32, 0.31, 0.3 , 0.29, 0.28, 0.27, 0.26, 0.25, 0.24,
       0.23, 0.22, 0.21, 0.2 , 0.19, 0.18, 0.17, 0.16, 0.15, 0.14, 0.13,
       0.12, 0.11, 0.1 , 0.09, 0.08, 0.07, 0.06, 0.05, 0.04, 0.03, 0.02,
       0.01])

x_s = np.arange(0,1,0.01)
ideal_roc_y = [0] + [1 for x in range(len(x_s)-1)]
good_roc_y = [1-(x**10) for x in np.arange(1,0,-0.01)]

ideal_pr_y = [1 for x in range(len(x_s)-1)] + [0]
good_pr_y = [1-(x**10) for x in np.arange(0,1,0.01)]

def plot_curve_examples(y, ylab, xlab, title, ax, x=x_s):
    ax.plot(x, y)
    ax.set_title(title, weight = 'bold')
    ax.set_xlabel(xlab, weight = 'bold')
    ax.set_ylabel(ylab, weight = 'bold')

#The Ideal ROC 

plot_curve_examples(ideal_roc_y, 'TPR', 'FPR', 'Ideal ROC Curve', ax[0])
plot_curve_examples(good_roc_y, 'TPR', 'FPR', 'Good ROC Curve', ax[1])

#Ideal PR Curves
fig, ax = plt.subplots(2, figsize=(15,15))
plot_curve_examples(ideal_pr_y, 'Precision', 'Recall', 'Ideal PR Curve', ax[0])
plot_curve_examples(good_pr_y, 'Precision', 'Recall', 'Good PR Curve', ax[1])

About

Writing

Reading list

Evaluating Classifiers¶

Context¶

Evaluating our model¶

The problem with JUST accuracy¶

different types of wrong errors¶

The confusion Matrix¶

Different error metrics¶

Moving Forward I will be slightly adjusting the context to illustrate different concepts in regards to evaluating classifiers¶

Lets ROC n Roll¶

ROC CURVE¶

AUC¶

Precision - Recall Curve¶

Good and Bad curve examples¶

Why?¶

Why?¶

A Little note on the PR curve¶

Further links to some videos/articles/references/chapters in books¶

These articles are subject to revision because learning is a journey¶

	Predicted False	Predicted True
Actual False	197	0
Actual True	3	0

	Predicted False	Predicted True
Actual False	126	4
Actual True	10	60